clear all
set more off, perm
local mydir "`1'"
global Tables	`mydir'/XS/Tables
global Work	`mydir'/XS/DataWork
global Raw	`mydir'/XS/DataRaw
global WRDS	`mydir'/WRDS
/* ********************************************************************* */
* [0] add davis data on book value (does not have datadate, gvkey, etc)
* In practice, the number of firms with data on book in the davis dataset is neglegible
import excel "$Raw/Davis Book Equity.xls", sheet("Davis Book Equity") cellrange(A2:CA1796) firstrow clear
rename *, lower
drop firstyr lastyr

reshape long book, i(permno) j(yr)
replace book=. if book==-99.99

tempfile davis
save "`davis'", replace
/* *********************************** */
* add permco
import sas $WRDS/CRSP/msf.sas7bdat, clear
rename *, lower

keep if month(date)==6 

gen yr=year(date)
keep yr permco permno

merge 1:1 permno yr using "`davis'"
drop _merge

tsset permno yr
replace permco=L.permco if permco==.
keep if permco+permno+book<.
save "`davis'", replace
/* ********************************************************************* */
* [1] LOAD ANNUAL COMPUSTAT (crsp_a_ccm.dta.gz obtained from wrds on 6/5/2022; it includes linktype,"LS" & linkprim, "J"
gzuse using $WRDS/Compustat/crsp_a_ccm_a.dta.gz, clear  
count 
rename *, lower
rename lpermco permco
rename lpermno permno
* deal with crsp-compustat code for missing values
ds, not(type string)  
foreach var in `r(varlist)' {
    replace `var' = . if `var' == 0.0001 | `var' == 0.0002 | `var' == 0.0003 | `var' == 0.0004 | `var' == 0.0008 
}
* Only data in USD
keep if index(curcd, "USD")>0

* sanity check --it does not actually drop any observations:
keep if index(datafmt ,"STD") & index(consol,"C") & index(popsrc,"D") & index(indfmt,"INDL")

* linktype
* LC – Link research complete (after extensive research by CRSP). Standard connection between databases.
* LU – Link is unresearched by CRSP. It is established by comparing the Compustat and historical CRSP CUSIPs. LU represents the most popular link type.
* LS – Link valid for this security only. Other CRSP PERMNOs with the same PERMCO will link to other GVKEYs.
keep if (index(linktype, "LU")>0) |  (index(linktype,"LC")>0) | (index(linktype,"LS")>0)
keep if (index(linktype, "LU")>0) |  (index(linktype,"LC")>0) 

* LINKPRIM:
*P = Primary, identified by Compustat in monthly security data.
*C = Primary, assigned by CRSP to resolve ranges of overlapping or missing primary markers from Compustat in order to produce one primary security throughout the company history.
*J = Joiner secondary issue of a company, identified by Compustat in monthly security data.
*N = Secondary, assigned by CRSP to override Compustat. 
*keep if (index(linkprim,  "P")>0) | (index(linkprim, "C")>0) | (index(linkprim, "J")>0)
 keep if (index(linkprim,  "P")>0) | (index(linkprim, "C")>0) 

*Ken uses Income before extraordinary items net of dividends on preferred
generate nib=ib
replace  nib=nib-dvp if dvp<. & dvp>0

gen sps=sale/(adjex_f*csho)
gen eps= nib/(adjex_f*csho)
gen csho_adjex=adjex_f*csho
/* ************************* */
* "street" earnings
* Hillebrand and McCarthy (2021): "...the line items niq, xidoq, spiq and nrtxtq in compustat fundq correspond to net income, extraordinary itmes
* and discontinued operations, special items and non-recurring income taxes, respectively. We then compute it as "compustat net income excluding non-IBES items" = niq - xidoq - spiq - nrtxtq 
* where we sum up the quarterly earnings over the past four quarters to obtain compustat net income excluding non-IBES items on a yearly basis."
* start with earnings before extraordinary items
generate street=ib
* remove spiq: discontinued operations, special items
replace street = street-spi 	if spi<.
* remove nrtxtq: non-recurring income taxes
replace street = street- nrtxt if nrtxt<.
* ********************
keep conm* gvkey permco permno datadate ipodate fyr linkprim liid linktype at ceq cogs xint xsga csho ib nib eps itcb lt ni pstk* seq txdb txditc sale sps mibt street csho_adjex
compress
/* ************************* */
generate steq=seq					/* Stockholders Equity */
replace  steq=ceq+pstk			if steq==.	/* Common/Ordinary Equity + Preferred  */
replace  steq=at-lt 			if steq==.	/* assets minus liabilities */

* compute book equity per share
* pstkrv: redemption value 
* pstkl:  liquidating value 
* pstk:   carrying value 
egen pfd=rowfirst(pstkrv pstkl pstk)
replace pfd=0 if pfd==.

* txditc: taxes and investment tax credit
egen txdbitcb=rowtotal(txdb itcb)	/* deferred taxes + investment tax credit; "rowtotal" treats missing as 0 */
egen dtax=rowfirst(txditc txdbitcb)

* Ken writes on his website: "Because of changes in the treatment of deferred taxes described in FASB 109, files produced after August 2016 no longer add Deferred Taxes and Investment 
* Tax Credit to BE for fiscal years ending in 1993 or later."
replace dtax=0 if year(datadate)>=1993

generate bookeq=(steq+dtax-pfd)

label var bookeq "Book value of equity (annual)"
/* ************************* */
* prepare data for tsset (pairs of gvkey-permcos must be unique)
duplicates tag gvkey permco datadate, gen(dup)
tabulate dup
drop if dup>0 & (index(linkprim, "J")>0)
drop dup
/* ******************* */
egen id=group(gvkey permco)

gen datem=mofd(datadate)
format datem %tm

tsset id datem
/* ************************* */
* Investment portfolios: Formed on the change in total assets from the FY ending in year t-2 to the FY ending in t-1, divided by t-2 total assets; using NYSE breakpoints
gen inv=(at/L12.at)-1			if at>0 & L12.at>0
/* ************************* */
gen nibt=F12.nib
/* ************************* */
* Operating profitability: Equals annual revenues minus COGS, interest expense, and SGA expenses divided by book equity for the last fiscal year end in t-1.
foreach var of varlist cogs xint xsga {
	replace `var'=0 if `var'==.
}
/* ************************* */
* Will need to merge with CRSP using permco/permno.  Sanity check: Are those pairs unique?
duplicates tag permco permno datadate, gen(dup)
tabulate dup
drop dup
* **********************
* Identify problematic observations with duplicate permco/permno/yr
generate   yr=year(datadate)
duplicates tag permco permno yr, gen(dup)
tabulate dup

* don't need to worry about duplicates if the accounting data is missing
egen available=rownonmiss(at sale bookeq inv)
drop if dup>0 & available==0
drop dup

* remaining duplicates seem to be caused by changes in FY.  When that happens, "inv" is missing (the lagged operator uses calendar units). Keep observtion with the fyr(t-1).  
duplicates tag permco permno yr, gen(dup)
tsset id datem
drop if dup==1 & fyr!=L12.fyr
drop dup

* sanity check:  make sure the previous step worked
duplicates tag permco permno yr, gen(dup)
tabulate dup
drop dup
************************
tabulate linkprim
tabulate linktype
keep conm gvkey permco permno datadate ipodate yr bookeq inv sale sps ib nib nibt eps mibt cogs xsga xint at street csho_adjex
************************
count if bookeq<.
************************
* add data on pensions from COMPUSTAT pension dataset (downloaded from wrds)
merge m:1 gvkey datadate using $WRDS/Compustat/PRBA.dta, keepusing(gvkey datadate prba)
drop if _merge==2
drop _merge

* Ken writes on his website: In August 2020, we removed the adjustment to book equity related to FASB Statement No. 106, Employers' Accounting for Postretirement Benefits Other Than Pensions, which was issued in 1990. This adjustment affects portfolios formed on book-to-market equity and portfolios formed on profitability, which is defined as operating income before depreciation and amortization minus interest expense scaled by book equity.
replace bookeq=bookeq-prba if prba!=. & datadate<=mdy(8,1,2020)

gen op=(sale-cogs-xsga-xint)/bookeq	if bookeq>0
replace mibt=0 if mibt==.
replace op=(sale-cogs-xsga-xint)/(bookeq+mibt)	if bookeq+mibt>0 & datadate>mdy(8,1,2018)
* Ken writes on his website: In August 2018, we have revised the method for computing Operating Profitability. We now include minority interest in the denominator, so the operating profitability ratio used to form portfolios in June of year t is annual revenues minus cost of goods sold, interest expense, and selling, general, and administrative expense divided by the sum of book equity and minority interest for the last fiscal year ending in t-1. 
/* ************************* */
* add data from davis -- "yr" is the formation year in davis
drop yr

gen yr=year(mdy(6 ,1,year(datadate)+1))
merge 1:1 permco permno yr using "`davis'"

sort permco yr
order permco yr book*

replace bookeq=book if bookeq==.

drop _merge book  
/* ************************* */
count if bookeq<.
************************
* "FF(1995): "we do not include firms until they are in COMPUSTATfor two years" (page 134)
sort permco permno yr
bys permco permno: gen count=_n
************************
generate date_book=mofd(mdy(12,1,year(datadate) ))
replace  date_book=mofd(mdy(12,1,yr-1))			if datadate==.

generate date_size=mofd(mdy(6,1,year(datadate)+1))
replace  date_size=mofd(mdy(6,1,yr))			if datadate==.

format date_book date_size %tm
save $Work/compustat, replace
/* ********************************************************************* */
* need to compute market cap for each permco (e.g. to compute BM).  Will need to restrict sample (only common stock; major stock exchanges)
import sas using $WRDS/CRSP/stocknames.sas7bdat, clear case(lower)
rename *, lower
*shrcd (1st digit): 
*1: Ordinary Common Shares
*2: Certificates
*3: ADRs (American Depository Receipts)
*4: SB1s (Shareg of Beneficial Interest)
*7: Units (Depogitory Unite, Units of Beneficial Interest, Unite of Limited Partnership Interest, Depository Receipts, etc.)

keep if (shrcd==10 | shrcd==11)
keep if (exchcd>=1 & exchcd<=3)

keep comnam permco permno cusip namedt nameenddt exchcd st_date
duplicates drop

* find end-of-the-month day for the last record of each stock
sort permco permno nameenddt
bys permco permno:  gen obs=_n
bys permco permno:  gen tot=_N
replace nameenddt=mdy(12,31,year(nameenddt)) 			if month(nameenddt)==12 & obs==tot
replace nameenddt=mdy(month(nameenddt)+1,1,year(nameenddt))-1	if month(nameenddt)<=11 & obs==tot
drop obs tot

tempfile names
save "`names'", replace
/* ********************************************************************* */
* compute market cap for the firm; msf_adjusted.dta generated by read_crsp.do
import sas $WRDS/CRSP/msf.sas7bdat, clear
rename *, lower
keep permco permno cusip date prc shrout cfacshr retx ret

gen mcap=abs(prc)*shrout
drop prc shrout
keep if mcap>0 &  mcap!=.
* ********
*[2] Restrict sample to common stock traded on major stock exchanges
joinby permno permco  using "`names'"
keep if date>=namedt & date<=nameenddt 

* deal with duplicates for permco/permno/date
drop namedt nameenddt 
duplicates drop

* a few observations (2,434 out of 3,660,551) are identical except that the security has different values for exchcd. Keep only one of each.
duplicates tag permco permno date ret, gen(dup)
tabulate dup
duplicates drop permco permno date ret, force

* Ken probably drops observations with missing returns
egen id=group(permco permno)
gen datem=mofd(date)
format datem %tm
format date %td

bys permco date: egen TotMcap=total(mcap)
tsset id datem
generate vwret1 =F1.ret *(mcap/TotMcap)
generate vwretx1=F1.retx*(mcap/TotMcap)

* [3] When firm has multiple securities, assign permno for security with the largest market cap @t
sort permco date mcap
collapse (lastnm) comnam permno exchcd st_date (sum) Mcap=mcap vwret1 vwretx1 (count) obs=vwret1 obsx=vwretx1, by(permco date)
replace vwret1 =. if obs ==0
replace vwretx1=. if obsx==0

format date %td
tempfile crsp
save "`crsp'", replace
* *******************************
*bm: The BE used in June of year t is the book equity for the last FY end in t-1. ME is price times shares outstanding at the end of December of t-1
use if month(date)==12 using "`crsp'", clear
rename Mcap mcap_book

gen date_book=mofd(date)
format date_book %tm
drop date exchcd
* *******************
merge 1:1 permco permno date_book using $Work/compustat
* must have mcap_book to compute bm
keep if _merge==3 
drop _merge

gen bm=bookeq/(mcap_book/1000)
save $Work/tmp, replace
/* ************************************************ */
*size:	The portfolios are constructed at the end of each June using the June market equity and NYSE breakpointsuse if month(date)==06 
use if month(date)==6 using "`crsp'", clear
rename Mcap mcap_size

gen date_size=mofd(date)
format date_size %tm
/* ************************************************ */
merge 1:1 permco permno date_size using $Work/tmp
drop if _merge==2
drop _merge
replace date_book=mofd(mdy(12,1,year(date)-1)) if date_book==.
/* ************************************************ */
* NOTE:  Dropping requirement that "count>1"; else no small growth in 1963:6-1964:6 

* Size x BM: The portfolios for July of year t to June of t+1 include all NYSE, AMEX, and NASDAQ stocks for which we have market equity data for December 
* of t-1 and June of t, and (positive) book equity data for t-1.
generate rSIZE=(mcap_size>0 & mcap_book>0) & (bookeq>0) & (bm+mcap_size+mcap_size+vwret1<.) 
generate rBOOK=(mcap_size>0 & mcap_book>0) & (bookeq>0) & (bm+mcap_size+mcap_size+vwret1<.) 

* Profitability:  All NYSE, AMEX, and NASDAQ stocks for which we have market equity data for June of t, (positive) book equity data for t-1, non-missing 
* revenues data for t-1, and non-missing data for at least one of the following: cost of goods sold, selling, general and administrative expenses, or interest
* expense for t-1
generate rPROF=(mcap_size>0) & (bookeq>0) & (mcap_size+bookeq+op+vwret1<.)  

*INV: All NYSE, AMEX, and NASDAQ stocks for which we have market equity data for June of t and total assets data for t-2 and t-1
generate rINVE=(mcap_size+inv+vwret1<.)  

bys date: astile SIZE = mcap_size if rSIZE==1, qc(exchcd==1) nq(10)
bys date: astile BOOK = bm 	  if rBOOK==1, qc(exchcd==1) nq(10)
bys date: astile PROF = op        if rPROF==1, qc(exchcd==1) nq(10)
bys date: astile INVE = inv       if rINVE==1, qc(exchcd==1) nq(10)
/* ************************************************ */
foreach var of varlist SIZE BOOK PROF INVE  {
	replace `var'=-99 if `var'==.
}	
replace BOOK=99	if bookeq<0
drop rSIZE rBOOK rPROF rINVE prba 
/* ************************************************ */
order datadate date date_* yr permco permno gvkey conm exchcd
format date %td
keep if year(date)>=1963
sort permco date
gen datem=mofd(date)

format datem %tm
save $Work/FamaFrench01, replace
/* ************************************************ */
!rm -f $Work/tmp.dta
